clear
clear matrix
set matsize 1000
set more off


*this command opens the FARE dataset for the year 2008
use ".........\fare2008.dta", clear

*the following code attaches the correspondence between nace rev 2 codes (ape_08) in FARE and the aggregate industry breakdown from input-output tables based on nace rev 1.1
sort ape_08
merge m:1 ape_08 using ".....\Done2.dta"

*we drop cases that we cannot assign
drop if aggregate_ind==.

*we drop cases with missing or zero employment (number of employees at december 31st) 
drop if empl=.
drop if empl==0
rename empl empfte


*In what follows the compute and substract "R&D related workers"
sort aggregate_ind
*the file we merge has info on the share of R&D outlays as a % of revenue (from OECD STAN) 
merge aggregate_ind using  ".....\aggregate_ind_R_D_FR.dta"
egen empfte_aggregate_ind=sum(empfte), by(aggregate_ind)
gen pippo=1
egen nfirms_aggregate_ind=sum(pippo), by(aggregate_ind)
gen cut=(empfte_aggregate_ind*(r_and_d_percent/100))/nfirms_aggregate_ind
su cut, de
replace empfte=empfte-cut

*we eliminate firms ending up, after accounting for "R&D related workers", with zero or negative employment 
drop if empfte<=0


*we now trim the data
egen p1_e=pctile(empfte), p(1.5) 
egen p99_e=pctile(empfte), p(98.5) 

drop if empfte<=p1_e
drop if empfte>=p99_e
drop p1_e p99_e


*finally we compute what we need
gen ln_empfte=log(empfte)
egen tot_emp_ind=sum(empfte), by(aggregate_ind)
gen mark_share_2=(empfte/tot_emp_ind)^2
egen double SD_log_emp=sd(ln_empfte), by(aggregate_ind)
egen double Mean_log_emp=mean(ln_empfte), by(aggregate_ind)
gen double dev_mean_4=(ln_empfte-Mean_log_emp)^4
egen double Mean_dev_mean_4=mean(dev_mean_4), by(aggregate_ind)
gen double Sigma_4=SD_log_emp^4

gen n_firms=1

collapse (sum) n_firms mark_share_2 (mean) SD_log_emp Sigma_4 Mean_dev_mean_4, by(aggregate_ind)

save "....\bs_data_all_fin_robust_RD.dta", replace
export excel using ".....\bs_data_all_fin_robust_RD.dta.xls", replace
